{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 导入基本模块"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "import os\n",
    "from os import path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings\n",
    "\n",
    "warnings.filterwarnings(action='ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 定义函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_frequency(data1, treshold = 1):\n",
    "    '''\n",
    "    data1.index.week表示一年中第几个星期。比如1月2日表示第1周。12月是50多周\n",
    "    可以将所处周相同的。作为新的一个特征，然后使用groupby进行分类。统计周出现的频率。月出现的频率\n",
    "    '''\n",
    "    data1['Frequency'] = data1.imsi_.map(data1.imsi_.value_counts()) # 频率使用的是imsi_出现的次数\n",
    "    data1 = data1[data1.Frequency >= treshold]\n",
    "    data1.index = pd.to_datetime(data1.update_time_)\n",
    "    data1['day'] = data1.index.day # 要统计一个imsi_每天，每周，每年出现的次数，可以增加新的特征列\n",
    "    data1['week'] = data1.index.week\n",
    "    data1['month'] = data1.index.month\n",
    "    day = data1.groupby(['imsi_',data1.day])['device_id_'].value_counts().unstack(fill_value=0)\n",
    "    week = data1.groupby(['imsi_',data1.week])['device_id_'].value_counts().unstack(fill_value=0)\n",
    "    month = data1.groupby(['imsi_',data1.month])['device_id_'].value_counts().unstack(fill_value=0)\n",
    "    print('data processed successfully')\n",
    "    return day, week, month"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_data(file_name):\n",
    "    '''\n",
    "    传入的是文件路径\n",
    "    读取并对数据进行处理\n",
    "    '''\n",
    "    file_extension = path.splitext(file_name)[1]\n",
    "    if file_extension == '.csv':\n",
    "        data = pd.read_csv(file_name, encoding='gbk')\n",
    "    elif file_extension == '.xlsx':\n",
    "        data = pd.read_excel(file_name, encoding='gbk')\n",
    "    drop_col = ['imei_', 'area_', 'msisdn_']\n",
    "    data.drop(drop_col, axis=1, inplace=True)\n",
    "    col = data.columns\n",
    "    for each in col:\n",
    "        if data[each].dtype == object:\n",
    "            data[each] = data[each].str.replace('\\t', '')\n",
    "    print('data load successfully')\n",
    "    return data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "metadata": {},
   "outputs": [],
   "source": [
    "def transform(day, week, month):\n",
    "    day_d = {}\n",
    "    week_d = {}\n",
    "    month_d = {}\n",
    "    day, week, month = day.reset_index(level=1), week.reset_index(level=1), month.reset_index(level=1)\n",
    "    day_user_id = list(set(day.index)) # 获取数据的基础id\n",
    "    week_user_id = list(set(week.index))\n",
    "    month_user_id = list(set(month.index))\n",
    "    for each in day_user_id:\n",
    "        day_d = {}\n",
    "        try:\n",
    "            day_d.update(day.loc[each,:].to_dict('list'))\n",
    "        except TypeError:\n",
    "            day_d.update(day.loc[each,:].to_frame().T.to_dict('list'))\n",
    "    for each in week_user_id:\n",
    "        week_d = {}\n",
    "        try:\n",
    "            week_d.update(week.loc[each,:].to_dict('list'))\n",
    "        except TypeError:\n",
    "            week_d.update(week.loc[each,:].to_frame().T.to_dict('list'))\n",
    "    for each in month_user_id:\n",
    "        month_d = {}\n",
    "        try:\n",
    "            month_d.update(month.loc[each,:].to_dict('list'))\n",
    "        except TypeError:\n",
    "            month_d.update(month.loc[each,:].to_frame().T.to_dict('list'))\n",
    "    print('data transform successfully')\n",
    "    return day_d, week_d, month_d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [],
   "source": [
    "def main():\n",
    "    '''\n",
    "    定义主函数，方便运行\n",
    "    '''\n",
    "    os.listdir('../Project Data/')\n",
    "    os.chdir('../Project Data/') # 切换到数据所在目录\n",
    "    data = get_data('20190524-001.csv')\n",
    "    day, week, month = get_frequency(data, treshold=2)\n",
    "    day, week, month = transform(day, week, month)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 运行代码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "data load successfully\n",
      "data processed successfully\n",
      "data transform successfully\n"
     ]
    }
   ],
   "source": [
    "main()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 代码调试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "os.listdir('../Project Data/')\n",
    "\n",
    "os.chdir('../Project Data/') # 切换到数据所在目录"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "data load successfully\n"
     ]
    }
   ],
   "source": [
    "data = get_data('20190524-001.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>imsi_</th>\n",
       "      <th>update_time_</th>\n",
       "      <th>lac_</th>\n",
       "      <th>cellid_</th>\n",
       "      <th>status_</th>\n",
       "      <th>ap_type_</th>\n",
       "      <th>device_id_</th>\n",
       "      <th>eventid_</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>460110413044304</td>\n",
       "      <td>2019-05-08 20:09:29</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>拒绝</td>\n",
       "      <td>FDD-LTE</td>\n",
       "      <td>DX-SZSC001</td>\n",
       "      <td>Normal Lau</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>460110413042577</td>\n",
       "      <td>2019-05-08 20:09:41</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>拒绝</td>\n",
       "      <td>FDD-LTE</td>\n",
       "      <td>DX-SZSC001</td>\n",
       "      <td>Normal Lau</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>460110425023300</td>\n",
       "      <td>2019-05-08 20:10:14</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>拒绝</td>\n",
       "      <td>FDD-LTE</td>\n",
       "      <td>DX-SZSC001</td>\n",
       "      <td>Normal Lau</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>460110714141865</td>\n",
       "      <td>2019-05-08 20:10:40</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>拒绝</td>\n",
       "      <td>FDD-LTE</td>\n",
       "      <td>DX-SZSC001</td>\n",
       "      <td>Normal Lau</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>460016692088409</td>\n",
       "      <td>2019-05-08 20:08:55</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>拒绝</td>\n",
       "      <td>FDD-LTE</td>\n",
       "      <td>LT-SZSC001</td>\n",
       "      <td>Normal Lau</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             imsi_         update_time_  lac_  cellid_ status_ ap_type_  \\\n",
       "0  460110413044304  2019-05-08 20:09:29     0        0      拒绝  FDD-LTE   \n",
       "1  460110413042577  2019-05-08 20:09:41     0        0      拒绝  FDD-LTE   \n",
       "2  460110425023300  2019-05-08 20:10:14     0        0      拒绝  FDD-LTE   \n",
       "3  460110714141865  2019-05-08 20:10:40     0        0      拒绝  FDD-LTE   \n",
       "4  460016692088409  2019-05-08 20:08:55     0        0      拒绝  FDD-LTE   \n",
       "\n",
       "   device_id_    eventid_  \n",
       "0  DX-SZSC001  Normal Lau  \n",
       "1  DX-SZSC001  Normal Lau  \n",
       "2  DX-SZSC001  Normal Lau  \n",
       "3  DX-SZSC001  Normal Lau  \n",
       "4  LT-SZSC001  Normal Lau  "
      ]
     },
     "execution_count": 154,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "data processed successfully\n"
     ]
    }
   ],
   "source": [
    "day, week, month = get_frequency(data, treshold=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {},
   "outputs": [],
   "source": [
    "day = day.reset_index(level=1)  # https://www.cnblogs.com/beyondChan/p/10926788.html\n",
    "\n",
    "# multiple index 的方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>device_id_</th>\n",
       "      <th>day</th>\n",
       "      <th>DX-SZSC001</th>\n",
       "      <th>LT-SZSC001</th>\n",
       "      <th>YD-SZSC001</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>imsi_</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>222992312514644</th>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>222992312514644</th>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>222992312514644</th>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>222992312514644</th>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>222992312514644</th>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "device_id_       day  DX-SZSC001  LT-SZSC001  YD-SZSC001\n",
       "imsi_                                                   \n",
       "222992312514644    8           0           1           0\n",
       "222992312514644    9           0           2           0\n",
       "222992312514644   10           0           2           0\n",
       "222992312514644   11           0           2           0\n",
       "222992312514644   13           0           4           0"
      ]
     },
     "execution_count": 157,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "day.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 158,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(week.index) == set(month.index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "unsupported type: <class 'str'>",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-159-693183100d4b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mday\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m460079768010752\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'list'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;31m# 直接这样报错，原因是数据类型是Series，转换成DF即可\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/lib/python3.8/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36mto_dict\u001b[0;34m(self, into)\u001b[0m\n\u001b[1;32m   1558\u001b[0m         \"\"\"\n\u001b[1;32m   1559\u001b[0m         \u001b[0;31m# GH16122\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1560\u001b[0;31m         \u001b[0minto_c\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstandardize_mapping\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minto\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1561\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0minto_c\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1562\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/lib/python3.8/site-packages/pandas/core/common.py\u001b[0m in \u001b[0;36mstandardize_mapping\u001b[0;34m(into)\u001b[0m\n\u001b[1;32m    387\u001b[0m         \u001b[0minto\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minto\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    388\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0missubclass\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minto\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mabc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMapping\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 389\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"unsupported type: {into}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    390\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0minto\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mdefaultdict\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    391\u001b[0m         \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"to_dict() only accepts initialized defaultdicts\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: unsupported type: <class 'str'>"
     ]
    }
   ],
   "source": [
    "day.loc[460079768010752,:].to_dict('list')\n",
    "# 直接这样报错，原因是数据类型是Series，转换成DF即可"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>460079768010752</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>device_id_</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>day</th>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DX-SZSC001</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LT-SZSC001</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>YD-SZSC001</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            460079768010752\n",
       "device_id_                 \n",
       "day                      23\n",
       "DX-SZSC001                0\n",
       "LT-SZSC001                0\n",
       "YD-SZSC001                3"
      ]
     },
     "execution_count": 165,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "day.loc[460079768010752,:].to_frame()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{460079768010752: {'day': [23],\n",
       "  'DX-SZSC001': [0],\n",
       "  'LT-SZSC001': [0],\n",
       "  'YD-SZSC001': [3]}}"
      ]
     },
     "execution_count": 167,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d = {}\n",
    "d[460079768010752] = {}\n",
    "d[460079768010752].update(day.loc[460079768010752,:].to_frame().T.to_dict('list'))\n",
    "d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2\n"
     ]
    }
   ],
   "source": [
    "try:\n",
    "    raise AttributeError\n",
    "except (TypeError,AttributeError) as reason:\n",
    "    if isinstance(reason, TypeError):\n",
    "        print(1)\n",
    "    else:\n",
    "        print(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>device_id_</th>\n",
       "      <th>day</th>\n",
       "      <th>DX-SZSC001</th>\n",
       "      <th>LT-SZSC001</th>\n",
       "      <th>YD-SZSC001</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>imsi_</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>222992312514644</th>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>222992312514644</th>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>222992312514644</th>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>222992312514644</th>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>222992312514644</th>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "device_id_       day  DX-SZSC001  LT-SZSC001  YD-SZSC001\n",
       "imsi_                                                   \n",
       "222992312514644    8           0           1           0\n",
       "222992312514644    9           0           2           0\n",
       "222992312514644   10           0           2           0\n",
       "222992312514644   11           0           2           0\n",
       "222992312514644   13           0           4           0"
      ]
     },
     "execution_count": 126,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "day.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['day', 'DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'], dtype='object', name='device_id_')"
      ]
     },
     "execution_count": 127,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "day.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "metadata": {},
   "outputs": [],
   "source": [
    "userid = list(set(day.index))\n",
    "d = {}\n",
    "for each in userid:\n",
    "    d[each] = {}\n",
    "    try:\n",
    "        d[each].update(day.loc[each,:].to_dict('list'))\n",
    "    except (TypeError, AttributeError) as r:\n",
    "        if isinstance(r, TypeError):\n",
    "            try:\n",
    "                d[each].update(day.loc[each,:].to_frame().T.to_dict('list'))\n",
    "            except:\n",
    "                print(f'{each} is missing...')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "unsupported type: <class 'str'>",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-145-765cc72be3dd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0md\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m460001092568480\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mday\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m460001092568480\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'list'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/miniconda3/lib/python3.8/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36mto_dict\u001b[0;34m(self, into)\u001b[0m\n\u001b[1;32m   1558\u001b[0m         \"\"\"\n\u001b[1;32m   1559\u001b[0m         \u001b[0;31m# GH16122\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1560\u001b[0;31m         \u001b[0minto_c\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstandardize_mapping\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minto\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1561\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0minto_c\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1562\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/lib/python3.8/site-packages/pandas/core/common.py\u001b[0m in \u001b[0;36mstandardize_mapping\u001b[0;34m(into)\u001b[0m\n\u001b[1;32m    387\u001b[0m         \u001b[0minto\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minto\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    388\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0missubclass\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minto\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mabc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMapping\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 389\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"unsupported type: {into}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    390\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0minto\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mdefaultdict\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    391\u001b[0m         \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"to_dict() only accepts initialized defaultdicts\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: unsupported type: <class 'str'>"
     ]
    }
   ],
   "source": [
    "d[460001092568480].update(day.loc[460001092568480,:].to_dict('list'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{460001092568480: [11, 0, 0, 3]}"
      ]
     },
     "execution_count": 147,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d = {}\n",
    "d[460001092568480] = {}\n",
    "pd.DataFrame(day.loc[460001092568480,:]).to_dict('list')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{460001092568480: [11, 0, 0, 3]}"
      ]
     },
     "execution_count": 150,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(day.loc[460001092568480,:]).to_dict('list')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 171,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(set(day.index)) == list(set(week.index))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
